-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[DAG] foldABSToABD - fallback to value tracking if the (ABS (SUB LHS, RHS)) operands aren't extended #147053
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-backend-aarch64 Author: Simon Pilgrim (RKSimon) ChangesISD::ABDS can be used if the subtraction will not overwrap (this is an extension is handle cases where the NSW flag has been lost) ISD::ABDU can be used if both operands have at least 1 zero sign bit. Fixes #147049 Full diff: https://github.com/llvm/llvm-project/pull/147053.diff 3 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 586eb2f3cf45e..1556e893ca050 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -11402,16 +11402,25 @@ SDValue DAGCombiner::foldABSToABD(SDNode *N, const SDLoc &DL) {
SDValue AbsOp0 = N->getOperand(0);
unsigned Opc0 = Op0.getOpcode();
- // Check if the operands of the sub are (zero|sign)-extended.
- // TODO: Should we use ValueTracking instead?
+ // Check if the operands of the sub are (zero|sign)-extended, otherwise
+ // fallback to ValueTracking.
if (Opc0 != Op1.getOpcode() ||
(Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND &&
Opc0 != ISD::SIGN_EXTEND_INREG)) {
// fold (abs (sub nsw x, y)) -> abds(x, y)
- // Don't fold this for unsupported types as we lose the NSW handling.
- if (AbsOp0->getFlags().hasNoSignedWrap() && hasOperation(ISD::ABDS, VT) &&
- TLI.preferABDSToABSWithNSW(VT)) {
- SDValue ABD = DAG.getNode(ISD::ABDS, DL, VT, Op0, Op1);
+ if (hasOperation(ISD::ABDS, VT)) {
+ // Don't fold this for unsupported types as we lose the NSW handling.
+ if (TLI.preferABDSToABSWithNSW(VT) &&
+ (AbsOp0->getFlags().hasNoSignedWrap() ||
+ DAG.willNotOverflowSub(/*IsSigned=*/true, Op0, Op1))) {
+ SDValue ABD = DAG.getNode(ISD::ABDS, DL, VT, Op0, Op1);
+ return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
+ }
+ }
+ // fold (abs (sub x, y)) -> abdu(x, y)
+ if (hasOperation(ISD::ABDU, VT) && DAG.SignBitIsZero(Op0) &&
+ DAG.SignBitIsZero(Op1)) {
+ SDValue ABD = DAG.getNode(ISD::ABDU, DL, VT, Op0, Op1);
return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
}
return SDValue();
diff --git a/llvm/test/CodeGen/AArch64/abd-combine.ll b/llvm/test/CodeGen/AArch64/abd-combine.ll
index e48680f4be98b..d0257890d2c43 100644
--- a/llvm/test/CodeGen/AArch64/abd-combine.ll
+++ b/llvm/test/CodeGen/AArch64/abd-combine.ll
@@ -18,13 +18,11 @@ define <8 x i16> @abdu_const(<8 x i16> %src1) {
; CHECK-LABEL: abdu_const:
; CHECK: // %bb.0:
; CHECK-NEXT: movi v1.4s, #1
-; CHECK-NEXT: ushll2 v2.4s, v0.8h, #0
-; CHECK-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: sub v1.4s, v2.4s, v1.4s
-; CHECK-NEXT: abs v1.4s, v1.4s
-; CHECK-NEXT: abs v0.4s, v0.4s
-; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: ushll v2.4s, v0.4h, #0
+; CHECK-NEXT: ushll2 v0.4s, v0.8h, #0
+; CHECK-NEXT: uabd v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: uabd v1.4s, v2.4s, v1.4s
+; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h
; CHECK-NEXT: ret
%zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
%sub = sub <8 x i32> %zextsrc1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
@@ -37,10 +35,10 @@ define <8 x i16> @abdu_const_lhs(<8 x i16> %src1) {
; CHECK-LABEL: abdu_const_lhs:
; CHECK: // %bb.0:
; CHECK-NEXT: movi v1.4s, #1
-; CHECK-NEXT: usubw v2.4s, v1.4s, v0.4h
-; CHECK-NEXT: usubw2 v0.4s, v1.4s, v0.8h
-; CHECK-NEXT: abs v0.4s, v0.4s
-; CHECK-NEXT: abs v1.4s, v2.4s
+; CHECK-NEXT: ushll v2.4s, v0.4h, #0
+; CHECK-NEXT: ushll2 v0.4s, v0.8h, #0
+; CHECK-NEXT: uabd v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: uabd v1.4s, v2.4s, v1.4s
; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h
; CHECK-NEXT: ret
%zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
@@ -53,13 +51,6 @@ define <8 x i16> @abdu_const_lhs(<8 x i16> %src1) {
define <8 x i16> @abdu_const_zero(<8 x i16> %src1) {
; CHECK-LABEL: abdu_const_zero:
; CHECK: // %bb.0:
-; CHECK-NEXT: movi v1.2d, #0000000000000000
-; CHECK-NEXT: ushll v2.4s, v0.4h, #0
-; CHECK-NEXT: usubw2 v0.4s, v1.4s, v0.8h
-; CHECK-NEXT: neg v1.4s, v2.4s
-; CHECK-NEXT: abs v0.4s, v0.4s
-; CHECK-NEXT: abs v1.4s, v1.4s
-; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h
; CHECK-NEXT: ret
%zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
%sub = sub <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, %zextsrc1
@@ -328,13 +319,11 @@ define <8 x i16> @abds_const(<8 x i16> %src1) {
; CHECK-LABEL: abds_const:
; CHECK: // %bb.0:
; CHECK-NEXT: movi v1.4s, #1
-; CHECK-NEXT: sshll2 v2.4s, v0.8h, #0
-; CHECK-NEXT: sshll v0.4s, v0.4h, #0
-; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: sub v1.4s, v2.4s, v1.4s
-; CHECK-NEXT: abs v1.4s, v1.4s
-; CHECK-NEXT: abs v0.4s, v0.4s
-; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: sshll v2.4s, v0.4h, #0
+; CHECK-NEXT: sshll2 v0.4s, v0.8h, #0
+; CHECK-NEXT: sabd v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: sabd v1.4s, v2.4s, v1.4s
+; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h
; CHECK-NEXT: ret
%zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
%sub = sub <8 x i32> %zextsrc1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
@@ -347,10 +336,10 @@ define <8 x i16> @abds_const_lhs(<8 x i16> %src1) {
; CHECK-LABEL: abds_const_lhs:
; CHECK: // %bb.0:
; CHECK-NEXT: movi v1.4s, #1
-; CHECK-NEXT: ssubw v2.4s, v1.4s, v0.4h
-; CHECK-NEXT: ssubw2 v0.4s, v1.4s, v0.8h
-; CHECK-NEXT: abs v0.4s, v0.4s
-; CHECK-NEXT: abs v1.4s, v2.4s
+; CHECK-NEXT: sshll v2.4s, v0.4h, #0
+; CHECK-NEXT: sshll2 v0.4s, v0.8h, #0
+; CHECK-NEXT: sabd v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: sabd v1.4s, v2.4s, v1.4s
; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h
; CHECK-NEXT: ret
%zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
@@ -363,10 +352,8 @@ define <8 x i16> @abds_const_lhs(<8 x i16> %src1) {
define <8 x i16> @abds_const_zero(<8 x i16> %src1) {
; CHECK-LABEL: abds_const_zero:
; CHECK: // %bb.0:
-; CHECK-NEXT: movi v1.2d, #0000000000000000
-; CHECK-NEXT: sshll v2.4s, v0.4h, #0
-; CHECK-NEXT: ssubw2 v0.4s, v1.4s, v0.8h
-; CHECK-NEXT: neg v1.4s, v2.4s
+; CHECK-NEXT: sshll v1.4s, v0.4h, #0
+; CHECK-NEXT: sshll2 v0.4s, v0.8h, #0
; CHECK-NEXT: abs v0.4s, v0.4s
; CHECK-NEXT: abs v1.4s, v1.4s
; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h
diff --git a/llvm/test/CodeGen/AArch64/sve-abd.ll b/llvm/test/CodeGen/AArch64/sve-abd.ll
index 72790155d046f..92bbd4e7f2759 100644
--- a/llvm/test/CodeGen/AArch64/sve-abd.ll
+++ b/llvm/test/CodeGen/AArch64/sve-abd.ll
@@ -283,8 +283,7 @@ define <vscale x 4 x i32> @uabd_non_matching_promotion(<vscale x 4 x i8> %a, <vs
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: and z0.s, z0.s, #0xff
; CHECK-NEXT: sxtb z1.s, p0/m, z1.s
-; CHECK-NEXT: sub z0.s, z0.s, z1.s
-; CHECK-NEXT: abs z0.s, p0/m, z0.s
+; CHECK-NEXT: sabd z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret
%a.zext = zext <vscale x 4 x i8> %a to <vscale x 4 x i32>
%b.zext = sext <vscale x 4 x i8> %b to <vscale x 4 x i32>
|
|
@llvm/pr-subscribers-llvm-selectiondag Author: Simon Pilgrim (RKSimon) ChangesISD::ABDS can be used if the subtraction will not overwrap (this is an extension is handle cases where the NSW flag has been lost) ISD::ABDU can be used if both operands have at least 1 zero sign bit. Fixes #147049 Full diff: https://github.com/llvm/llvm-project/pull/147053.diff 3 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 586eb2f3cf45e..1556e893ca050 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -11402,16 +11402,25 @@ SDValue DAGCombiner::foldABSToABD(SDNode *N, const SDLoc &DL) {
SDValue AbsOp0 = N->getOperand(0);
unsigned Opc0 = Op0.getOpcode();
- // Check if the operands of the sub are (zero|sign)-extended.
- // TODO: Should we use ValueTracking instead?
+ // Check if the operands of the sub are (zero|sign)-extended, otherwise
+ // fallback to ValueTracking.
if (Opc0 != Op1.getOpcode() ||
(Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND &&
Opc0 != ISD::SIGN_EXTEND_INREG)) {
// fold (abs (sub nsw x, y)) -> abds(x, y)
- // Don't fold this for unsupported types as we lose the NSW handling.
- if (AbsOp0->getFlags().hasNoSignedWrap() && hasOperation(ISD::ABDS, VT) &&
- TLI.preferABDSToABSWithNSW(VT)) {
- SDValue ABD = DAG.getNode(ISD::ABDS, DL, VT, Op0, Op1);
+ if (hasOperation(ISD::ABDS, VT)) {
+ // Don't fold this for unsupported types as we lose the NSW handling.
+ if (TLI.preferABDSToABSWithNSW(VT) &&
+ (AbsOp0->getFlags().hasNoSignedWrap() ||
+ DAG.willNotOverflowSub(/*IsSigned=*/true, Op0, Op1))) {
+ SDValue ABD = DAG.getNode(ISD::ABDS, DL, VT, Op0, Op1);
+ return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
+ }
+ }
+ // fold (abs (sub x, y)) -> abdu(x, y)
+ if (hasOperation(ISD::ABDU, VT) && DAG.SignBitIsZero(Op0) &&
+ DAG.SignBitIsZero(Op1)) {
+ SDValue ABD = DAG.getNode(ISD::ABDU, DL, VT, Op0, Op1);
return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
}
return SDValue();
diff --git a/llvm/test/CodeGen/AArch64/abd-combine.ll b/llvm/test/CodeGen/AArch64/abd-combine.ll
index e48680f4be98b..d0257890d2c43 100644
--- a/llvm/test/CodeGen/AArch64/abd-combine.ll
+++ b/llvm/test/CodeGen/AArch64/abd-combine.ll
@@ -18,13 +18,11 @@ define <8 x i16> @abdu_const(<8 x i16> %src1) {
; CHECK-LABEL: abdu_const:
; CHECK: // %bb.0:
; CHECK-NEXT: movi v1.4s, #1
-; CHECK-NEXT: ushll2 v2.4s, v0.8h, #0
-; CHECK-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: sub v1.4s, v2.4s, v1.4s
-; CHECK-NEXT: abs v1.4s, v1.4s
-; CHECK-NEXT: abs v0.4s, v0.4s
-; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: ushll v2.4s, v0.4h, #0
+; CHECK-NEXT: ushll2 v0.4s, v0.8h, #0
+; CHECK-NEXT: uabd v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: uabd v1.4s, v2.4s, v1.4s
+; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h
; CHECK-NEXT: ret
%zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
%sub = sub <8 x i32> %zextsrc1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
@@ -37,10 +35,10 @@ define <8 x i16> @abdu_const_lhs(<8 x i16> %src1) {
; CHECK-LABEL: abdu_const_lhs:
; CHECK: // %bb.0:
; CHECK-NEXT: movi v1.4s, #1
-; CHECK-NEXT: usubw v2.4s, v1.4s, v0.4h
-; CHECK-NEXT: usubw2 v0.4s, v1.4s, v0.8h
-; CHECK-NEXT: abs v0.4s, v0.4s
-; CHECK-NEXT: abs v1.4s, v2.4s
+; CHECK-NEXT: ushll v2.4s, v0.4h, #0
+; CHECK-NEXT: ushll2 v0.4s, v0.8h, #0
+; CHECK-NEXT: uabd v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: uabd v1.4s, v2.4s, v1.4s
; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h
; CHECK-NEXT: ret
%zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
@@ -53,13 +51,6 @@ define <8 x i16> @abdu_const_lhs(<8 x i16> %src1) {
define <8 x i16> @abdu_const_zero(<8 x i16> %src1) {
; CHECK-LABEL: abdu_const_zero:
; CHECK: // %bb.0:
-; CHECK-NEXT: movi v1.2d, #0000000000000000
-; CHECK-NEXT: ushll v2.4s, v0.4h, #0
-; CHECK-NEXT: usubw2 v0.4s, v1.4s, v0.8h
-; CHECK-NEXT: neg v1.4s, v2.4s
-; CHECK-NEXT: abs v0.4s, v0.4s
-; CHECK-NEXT: abs v1.4s, v1.4s
-; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h
; CHECK-NEXT: ret
%zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
%sub = sub <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, %zextsrc1
@@ -328,13 +319,11 @@ define <8 x i16> @abds_const(<8 x i16> %src1) {
; CHECK-LABEL: abds_const:
; CHECK: // %bb.0:
; CHECK-NEXT: movi v1.4s, #1
-; CHECK-NEXT: sshll2 v2.4s, v0.8h, #0
-; CHECK-NEXT: sshll v0.4s, v0.4h, #0
-; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: sub v1.4s, v2.4s, v1.4s
-; CHECK-NEXT: abs v1.4s, v1.4s
-; CHECK-NEXT: abs v0.4s, v0.4s
-; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: sshll v2.4s, v0.4h, #0
+; CHECK-NEXT: sshll2 v0.4s, v0.8h, #0
+; CHECK-NEXT: sabd v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: sabd v1.4s, v2.4s, v1.4s
+; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h
; CHECK-NEXT: ret
%zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
%sub = sub <8 x i32> %zextsrc1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
@@ -347,10 +336,10 @@ define <8 x i16> @abds_const_lhs(<8 x i16> %src1) {
; CHECK-LABEL: abds_const_lhs:
; CHECK: // %bb.0:
; CHECK-NEXT: movi v1.4s, #1
-; CHECK-NEXT: ssubw v2.4s, v1.4s, v0.4h
-; CHECK-NEXT: ssubw2 v0.4s, v1.4s, v0.8h
-; CHECK-NEXT: abs v0.4s, v0.4s
-; CHECK-NEXT: abs v1.4s, v2.4s
+; CHECK-NEXT: sshll v2.4s, v0.4h, #0
+; CHECK-NEXT: sshll2 v0.4s, v0.8h, #0
+; CHECK-NEXT: sabd v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: sabd v1.4s, v2.4s, v1.4s
; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h
; CHECK-NEXT: ret
%zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
@@ -363,10 +352,8 @@ define <8 x i16> @abds_const_lhs(<8 x i16> %src1) {
define <8 x i16> @abds_const_zero(<8 x i16> %src1) {
; CHECK-LABEL: abds_const_zero:
; CHECK: // %bb.0:
-; CHECK-NEXT: movi v1.2d, #0000000000000000
-; CHECK-NEXT: sshll v2.4s, v0.4h, #0
-; CHECK-NEXT: ssubw2 v0.4s, v1.4s, v0.8h
-; CHECK-NEXT: neg v1.4s, v2.4s
+; CHECK-NEXT: sshll v1.4s, v0.4h, #0
+; CHECK-NEXT: sshll2 v0.4s, v0.8h, #0
; CHECK-NEXT: abs v0.4s, v0.4s
; CHECK-NEXT: abs v1.4s, v1.4s
; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h
diff --git a/llvm/test/CodeGen/AArch64/sve-abd.ll b/llvm/test/CodeGen/AArch64/sve-abd.ll
index 72790155d046f..92bbd4e7f2759 100644
--- a/llvm/test/CodeGen/AArch64/sve-abd.ll
+++ b/llvm/test/CodeGen/AArch64/sve-abd.ll
@@ -283,8 +283,7 @@ define <vscale x 4 x i32> @uabd_non_matching_promotion(<vscale x 4 x i8> %a, <vs
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: and z0.s, z0.s, #0xff
; CHECK-NEXT: sxtb z1.s, p0/m, z1.s
-; CHECK-NEXT: sub z0.s, z0.s, z1.s
-; CHECK-NEXT: abs z0.s, p0/m, z0.s
+; CHECK-NEXT: sabd z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret
%a.zext = zext <vscale x 4 x i8> %a to <vscale x 4 x i32>
%b.zext = sext <vscale x 4 x i8> %b to <vscale x 4 x i32>
|
332e9b7 to
ada6757
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
… RHS)) operands aren't extended ISD::ABDS can be used if the subtraction will not overwrap (this is an extension is handle cases where the NSW flag has been lost) ISD::ABDU can be used if both operands have at least 1 zero sign bit. Fixes llvm#147049
ada6757 to
abc561d
Compare
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/197/builds/6701 Here is the relevant piece of the build log for the reference |
ISD::ABDS can be used if the signed subtraction will not overwrap (this is an extension to handle cases where the NSW flag has been lost)
ISD::ABDU can be used if both operands have at least 1 zero sign bit.
Fixes #147049